knitr::opts_chunk$set(echo = TRUE, message=F, warning=F, fig.width = 14, fig.height = 5)
library(readr)
library(sf)
## Linking to GEOS 3.13.1, GDAL 3.11.0, PROJ 9.6.0; sf_use_s2() is TRUE
library(leaflet)
library(rstudioapi)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(patchwork)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout

Set working directory to the place this script is saved:

# Getting the path of your current open file
current_path = rstudioapi::getActiveDocumentContext()$path 
setwd(dirname(current_path))

Load our data:

# load data that has the dates the historic districts were designated
# comes from here: https://planning.dc.gov/page/dc-historic-districts
# hd_data <- readr::read_csv("https://docs.google.com/spreadsheets/d/1Ajl1iAS0NRB7vk_UFDveeWzGkwf3tuiDo-zV9_wtzRM/gviz/tq?tqx=out:csv&sheet=data")
hd_data <- readr::read_csv("hd_data/data.csv")

# load the historic district boundary shape files
# comes from here: https://opendata.dc.gov/datasets/DCGIS::historic-districts/about 
hd_shp <- sf::st_read("Historic_Districts/Historic_Districts.shp")
## Reading layer `Historic_Districts' from data source 
##   `C:\Users\edwar\Documents\GitHub\hd_analysis\Historic_Districts\Historic_Districts.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 73 features and 18 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -8584936 ymin: 4696608 xmax: -8564736 ymax: 4720410
## Projected CRS: WGS 84 / Pseudo-Mercator
# load the 2022 ward shape files
# comes from here: https://opendata.dc.gov/datasets/DCGIS::wards-from-2022/about
ward_shp <- sf::st_read("Wards_from_2022/Wards_from_2022.shp")
## Reading layer `Wards_from_2022' from data source 
##   `C:\Users\edwar\Documents\GitHub\hd_analysis\Wards_from_2022\Wards_from_2022.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 8 features and 20 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: -8584936 ymin: 4691870 xmax: -8561487 ymax: 4721094
## Projected CRS: WGS 84 / Pseudo-Mercator
# load the zoning map:
# comes from here: https://opendata.dc.gov/datasets/DCGIS::zoning-boundaries-zoning-regulations-of-2016/about
zone_shp <- sf::st_read("zoning/Zoning_Boundaries_(Zoning_Regulations_of_2016).shp")
## Reading layer `Zoning_Boundaries_(Zoning_Regulations_of_2016)' from data source 
##   `C:\Users\edwar\Documents\GitHub\hd_analysis\zoning\Zoning_Boundaries_(Zoning_Regulations_of_2016).shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 953 features and 15 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -8584936 ymin: 4691870 xmax: -8561487 ymax: 4721094
## Projected CRS: WGS 84 / Pseudo-Mercator
# load & clean census tract data
# please see https://opendata.dc.gov/datasets/DCGIS::census-tracts-in-1970/about
# for example, for more details on variable names
load_clean_tracts <- function(geo_id_var, black_var, white_var, totpop_var, year) {
  # Loads the shapefile, removes unneeded columns, calculates the tract area in meters^2
  shp <- sf::st_read(paste0("tract_data/Census_Tracts_in_", year, 
                            "/Census_Tracts_in_", year, ".shp")) 
  shp <- shp %>% 
    rename("geo_id" = !!sym(geo_id_var),
           "n_black" = !!sym(black_var),
           "n_white" = !!sym(white_var),
           "n_tot" = !!sym(totpop_var)
           ) %>%
    select("geo_id", starts_with("n_")) %>%
    mutate(n_other = n_tot - (n_black + n_white),
           year = year)
  shp$geo_area_meters <- sf::st_area(shp)
  shp <- sf::st_transform(shp, 4326)
  
  return(shp %>% select("year", "geo_id", "n_tot", "n_black", "n_white", "n_other", "geo_area_meters", "geometry"))
}
t60_shp <- load_clean_tracts("GISJOIN", "B58013", "B58011", "CA4001", 1960)
t70_shp <- load_clean_tracts("GISJOIN", "CEB03", "CEB01", "CY7001", 1970) 
t80_shp <- load_clean_tracts("GISJOIN", "C9D003", "C9D001", "C7L001", 1980)
t90_shp <- load_clean_tracts("TRACTNO", "BLACK", "WHITE", "POPULATION", 1990)
t00_shp <- load_clean_tracts("TRACTNO", "BLACK", "WHITE", "TOTAL", 2000)
t10_shp <- load_clean_tracts("TRACT", "P0010004", "P0010003", "P0010001", 2010)
t20_shp <- load_clean_tracts("TRACT", "P0010004", "P0010003", "P0010001", 2020)
gc()

Merge HD data onto HD shapefile, subset to only look at neighborhood HDs:

hd_shp <- dplyr::left_join(x = hd_shp, y = hd_data, by = "UNIQUEID")
hd_shp <- hd_shp[hd_shp$Neighborhood_HD==1,]

Transform shape files to mercator projection:

# convert to mercator projection
zone_shp <- sf::st_transform(zone_shp, 4326)
hd_shp <- sf::st_transform(hd_shp, 4326)
ward_shp <- sf::st_transform(ward_shp, 4326)

Subset the zoning map to just look at residential zones:

# list zones:
zones_list <- sort(unique(zone_shp$ZR16))
housing_zones <- zones_list[grep(x=zones_list, pattern = "^R|^MU")]
# subset
zone_shp <- zone_shp[zone_shp$ZR16 %in% housing_zones,]

# create simplified labels
zone_shp$ZR16_simple <- "Other"
zone_shp$ZR16_simple[grep(x=zone_shp$ZR16, pattern="^RA-")] <- "Apartment zones"
zone_shp$ZR16_simple[grep(x=zone_shp$ZR16, pattern="^R-")] <- "Residential zones"
zone_shp$ZR16_simple[grep(x=zone_shp$ZR16, pattern="^RF-")] <- "Residential flat zones"
zone_shp$ZR16_simple[grep(x=zone_shp$ZR16, pattern="^MU-")] <- "Mixed use zones"

# show on a map:
factpal <- colorFactor(palette = "Set1", domain = zone_shp$ZR16_simple)

leaflet(zone_shp) %>%
  addProviderTiles(providers$CartoDB.Positron) %>%
  addPolygons(fillColor = ~factpal(ZR16_simple), # Apply the color function
              fillOpacity = 0.7,
              weight = 1,
              opacity = 1,
              color = "white",
              label=~ZR16,
              highlightOptions = highlightOptions(weight = 3,
                                                  color = "white",
                                                  bringToFront = TRUE
        )
  ) %>%
  addLegend(pal = factpal, values = ~ZR16_simple, opacity = 0.7, title = NULL,
    position = "bottomright")

Calculate the total amount of residential and MU land in DC:

# need to fix broken geometries:
fix_geo_if_broken <- function(shp) {
  if (min(sf::st_is_valid(shp)) == 0) {
    print("Fixing geometry...")
    return(sf::st_make_valid(shp))
  } else {
      return(shp)
    }
  }

zone_shp <- fix_geo_if_broken(zone_shp)
## [1] "Fixing geometry..."
hd_shp <- fix_geo_if_broken(hd_shp)
ward_shp <- fix_geo_if_broken(ward_shp)
t60_shp <- fix_geo_if_broken(t60_shp)
t70_shp <- fix_geo_if_broken(t70_shp)
t80_shp <- fix_geo_if_broken(t80_shp)
t90_shp <- fix_geo_if_broken(t90_shp)
t00_shp <- fix_geo_if_broken(t00_shp)
t10_shp <- fix_geo_if_broken(t10_shp)
t20_shp <- fix_geo_if_broken(t20_shp)


zone_shp$area_meters <- sf::st_area(zone_shp)
zone_shp$area_acres <- as.vector(zone_shp$area_meters * 0.000247105)
total_zone_acres <- sum(zone_shp$area_acres, na.rm=T)

Total land area covered by HDs over time:

# get shape areas:
hd_shp$area_meters <- sf::st_area(hd_shp)
hd_shp$area_acres <- as.vector(hd_shp$area_meters * 0.000247105)

years <- c(1960, 1970, 1980, 1990, 2000, 2010, 2025)
land_areas <- rep(NA, length(years))
counter <- 1
for (year in years) { # Land area covered by HDs by year:
  land_area <- sum(hd_shp$area_acres[hd_shp$desig_date < year])
  land_areas[counter] <- land_area
  counter <- counter + 1
}

p <- data.frame(years, land_areas, round(100*land_areas / total_zone_acres,0))
names(p) <- c("Year", 
              "Area covered by by Historic Districts, in Acres",
              "Percent of 2016 residential zone covered by Neighborhood HD")
plot1 <-
  ggplot(p, 
       aes(x=Year, 
           y=`Area covered by by Historic Districts, in Acres`)) + 
  geom_bar(stat = "identity", fill="#0f9535") +
  geom_text(aes(label = round(`Area covered by by Historic Districts, in Acres`, 0), vjust = -1.7)) +
  ylab("Acres") +
  theme_minimal() +
  ggtitle('Acres covered by "neighborhood historic districts" has steadily increased over time')

plot2 <-
  ggplot(p, 
       aes(x=Year, 
           y=`Percent of 2016 residential zone covered by Neighborhood HD`)) + 
  geom_line(color="#0f9535", size=.75) +
  geom_point(color="#0f9535") +
  geom_text(aes(label = paste0(`Percent of 2016 residential zone covered by Neighborhood HD`, "%")), vjust = -1.7) +
  theme_minimal() +
  ylab("Percent") +
  ggtitle('And the % of residential area covered by HDs has more than doubled since 1980')

plot1 + plot2

Let’s quickly compare which HDs were designated before vs after 1980:

hd_shp$flag_1980 <- 0
hd_shp$flag_1980[hd_shp$desig_date < 1980] <- 1

leaflet() %>%
  addProviderTiles(providers$CartoDB.Positron) %>%
  addPolygons(group= "Designated before 1980",
              data=hd_shp[hd_shp$flag_1980==1,],
              fillColor = "skyblue", 
              fillOpacity = 0.7,
              weight = 1,
              opacity = 1,
              color = "white",
              label=~LABEL,
              highlightOptions = highlightOptions(weight = 3,
                                                  color = "white",
                                                  bringToFront = FALSE
        ) 
  ) %>%
  addPolygons(group= "Designated after 1980",
              data=hd_shp[hd_shp$flag_1980==0,],
              fillColor = "hotpink", 
              fillOpacity = 0.7,
              weight = 1,
              opacity = 1,
              color = "white",
              label=~LABEL,
              highlightOptions = highlightOptions(weight = 3,
                                                  color = "white",
                                                  bringToFront = FALSE
        ) 
  ) %>%
  addLayersControl(
    overlayGroups = c("Designated before 1980", "Designated after 1980"),
    options = layersControlOptions(collapsed = FALSE)
  )

Now let’s look and see how HD changed over time, in terms of % black residents and % white residents, compared to nearby neighborhoods (tracts) that were not in HDs.

This will have a few steps:

  1. Take a given HD.
  2. Get the intersection of the HD with census tracts. If at least X% of the census tract area is in the HD, we’ll count it as part of the HD. (We’ll vary X as robustness check later)
  3. Create a buffer around the HD of distance Y (we’ll vary Y as a robustness check later). Run another intersection with the census tracts and that buffer. Count any tracts that touch the buffer but are not in an HD as “comparison” tracts.
  4. Note the % of white and black residents in each HD and those HDs comparison tracts.
  5. See how that changes over time.
# drop some very small HDs that are like a single circle
# just dropped anything smaller than 10 acres
hd_shp <-
  hd_shp %>%
  filter(!(LABEL %in% c("Emerald St HD",
                       "Grant Rd HD",
                       "Mount Vernon Triangle HD",
                       "Grant Circle HD",
                       "Union Market"
                       )))

get_geos_in_hd <- function(shp, min_pct, year) {
  # get the tracts that are in each HD in a given year
  
  i <- sf::st_intersection(x=shp, y=hd_shp)
  i$i_area <- sf::st_area(i)
  i$pct_of_geo_area <- as.vector(i$i_area / i$geo_area_meters)
  
  geos_in_hd <- i[i$pct_of_geo_area > min_pct, 
                    c("year", "geo_id", "LABEL",
                      "n_tot", "n_black", "n_white", "n_other",
                      "desig_date", "pct_of_geo_area")]
  geos_in_hd_summary <-
    geos_in_hd %>%
    mutate(n_tot_prorated = n_tot * pct_of_geo_area,
           n_black_prorated = n_black * pct_of_geo_area,
           n_white_prorated = n_white * pct_of_geo_area,
           n_other_prorated = n_other * pct_of_geo_area) %>%
    select("year", "LABEL", "geo_id", starts_with("n_"), "desig_date") %>%
    group_by(LABEL) %>%
      summarise(n_tot = sum(n_tot, na.rm=T),
                n_black = sum(n_black, na.rm=T),
                n_white = sum(n_white, na.rm=T),
                n_other = sum(n_other, na.rm=T),
                n_tot_prorated = sum(n_tot_prorated, na.rm=T),
                n_black_prorated = sum(n_black_prorated, na.rm=T),
                n_white_prorated = sum(n_white_prorated, na.rm=T),
                n_other_prorated = sum(n_other_prorated, na.rm=T),
                desig_year = max(desig_date, na.rm=T)) %>%
    mutate(desig_yet = ifelse(desig_year<year, 1, 0), 
           year = year)
  
    rv = list("geos_in_hd"=geos_in_hd, "summary"=sf::st_drop_geometry(geos_in_hd_summary))
    
    return(rv)
}

# turn off spherical geometry (s2)
sf_use_s2(FALSE)
# ranging the min % between .2 and .6 seems to give reasonable results
mp = 0.25 # TODO: run analysis varying this factor
hd_geos60 <- get_geos_in_hd(t60_shp, min_pct = mp, year = 1960)
hd_geos70 <- get_geos_in_hd(t70_shp, min_pct = mp, year = 1970)
hd_geos80 <- get_geos_in_hd(t80_shp, min_pct = mp, year = 1980)
hd_geos90 <- get_geos_in_hd(t90_shp, min_pct = mp, year = 1990)
hd_geos00 <- get_geos_in_hd(t00_shp, min_pct = mp, year = 2000)
hd_geos10 <- get_geos_in_hd(t10_shp, min_pct = mp, year = 2010)
hd_geos20 <- get_geos_in_hd(t20_shp, min_pct = mp, year = 2020)
gc()
##           used (Mb) gc trigger  (Mb) max used  (Mb)
## Ncells 1473426 78.7    2423300 129.5  2423300 129.5
## Vcells 4398425 33.6   10146329  77.5 10146321  77.5

Plot where the tracts “inside” the HDs were for 2020:

plot_geos_in_hds <- function(shp, geos_in_hds) {
  rv <-
    leaflet() %>%
  addProviderTiles(providers$CartoDB.Positron) %>%
  addPolygons(group= "tracts labeled as in HDs",
              data=shp[shp$geo_id %in% geos_in_hds, ],
              fillColor = "skyblue", 
              fillOpacity = 0.7,
              weight = 1,
              opacity = 1,
              color = "white",
              label=~geo_id,
              highlightOptions = highlightOptions(weight = 3,
                                                  color = "white",
                                                  bringToFront = FALSE
        ) 
  ) %>%
  addPolygons(group= "HDs",
              data=hd_shp,
              fillColor = "hotpink", 
              fillOpacity = 0.7,
              weight = 1,
              opacity = 1,
              color = "white",
              label=~LABEL,
              highlightOptions = highlightOptions(weight = 3,
                                                  color = "white",
                                                  bringToFront = FALSE
        ) 
  ) %>%
  addLayersControl(
    overlayGroups = c("tracts labeled as in HDs", "HDs"),
    options = layersControlOptions(collapsed = FALSE)
  )
  
  return(rv)
}
plot_geos_in_hds(t60_shp, hd_geos60$geos_in_hd$geo_id)
plot_geos_in_hds(t70_shp, hd_geos70$geos_in_hd$geo_id)
plot_geos_in_hds(t80_shp, hd_geos80$geos_in_hd$geo_id)
plot_geos_in_hds(t90_shp, hd_geos90$geos_in_hd$geo_id)
plot_geos_in_hds(t00_shp, hd_geos00$geos_in_hd$geo_id)
plot_geos_in_hds(t10_shp, hd_geos10$geos_in_hd$geo_id)
plot_geos_in_hds(t20_shp, hd_geos20$geos_in_hd$geo_id)

Now get a list of neighboring tracts:

get_neighbor_geos <- function(hd_shp, geo_shp, buffer_dist, geos_in_hd, remove_geo_thresh, year) {
  # first make a buffer around the HDs
  b <- sf::st_buffer(hd_shp, dist = buffer_dist)
  # then get the intersection of the buffer and the tracts
  i <- sf::st_intersection(x=geo_shp, y=b)
  # remove tracts that have already been classified as within an HD
  i <- i[!(i$geo_id %in% geos_in_hd$geos_in_hd$geo_id),]
  # also remove any tracts/blocks for which more than X% of an HD is in that tract/block
  hd_shp$area_meters <- sf::st_area(hd_shp)
  hd_geo <- sf::st_intersection(x=geo_shp, y=hd_shp)
  hd_geo$intersect_area <- sf::st_area(hd_geo)
  hd_geo$pct_area <- as.vector(hd_geo$intersect_area / hd_geo$area_meters)
  geos_to_remove <- hd_geo$geo_id[hd_geo$pct_area > remove_geo_thresh]
  neighboring_geos <- i[!(i$geo_id %in% geos_to_remove),]
  
  # finally, remove
  neighboring_geos <- geo_shp[geo_shp$geo_id %in% neighboring_geos$geo_id,]
  neighboring_geos <- dplyr::left_join(neighboring_geos, 
                                         sf::st_drop_geometry(i[,c("geo_id", "LABEL", "desig_date")]),
                                         by="geo_id")
  
  neighbor_geos_summary <-
    sf::st_drop_geometry(neighboring_geos) %>%
    group_by(LABEL) %>%
      summarise(n_tot = sum(n_tot, na.rm=T),
                n_black = sum(n_black, na.rm=T),
                n_white = sum(n_white, na.rm=T),
                n_other = sum(n_other, na.rm=T),
                desig_year = max(desig_date, na.rm=T)) %>%
    mutate(desig_yet = ifelse(desig_year<year, 1, 0),
           year = year)
  
  rv = list("buffers" = b, 
            "neighbor_geos" = neighboring_geos, 
            "neighbor_geos_summary" = neighbor_geos_summary)
  
  return(rv)
}

buff_dist = .005
threshold = .1

nearby_tracts60 <- 
  get_neighbor_geos(hd_shp=hd_shp, geo_shp=t60_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos70,threshold, 1960)
nearby_tracts70 <- 
  get_neighbor_geos(hd_shp=hd_shp, geo_shp=t70_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos70,threshold, 1970)
nearby_tracts80 <- 
  get_neighbor_geos(hd_shp=hd_shp, geo_shp=t80_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos80,threshold, 1980)
nearby_tracts90 <- 
  get_neighbor_geos(hd_shp=hd_shp, geo_shp=t90_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos90,threshold, 1990)
gc()
##           used (Mb) gc trigger  (Mb) max used  (Mb)
## Ncells 1474684 78.8    2423300 129.5  2423300 129.5
## Vcells 4855403 37.1   10146329  77.5 10146321  77.5
nearby_tracts00 <- 
  get_neighbor_geos(hd_shp=hd_shp, geo_shp=t00_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos00,threshold, 2000)
nearby_tracts10 <- 
  get_neighbor_geos(hd_shp=hd_shp, geo_shp=t10_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos10,threshold, 2010)
nearby_tracts20 <- 
  get_neighbor_geos(hd_shp=hd_shp, geo_shp=t20_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos20,threshold, 2020)
gc()
##           used (Mb) gc trigger  (Mb) max used  (Mb)
## Ncells 1475767 78.9    2423300 129.5  2423300 129.5
## Vcells 4913086 37.5   10146329  77.5 10146321  77.5
plot_neighbor_geos <- function(shp) {
  rv <-
    leaflet() %>%
  addProviderTiles(providers$CartoDB.Positron) %>%
  addPolygons(group='buffers', data=shp[["buffers"]]) %>%
  addPolygons(group= "tracts labeled as near HDs",
              data=shp[["neighbor_geos"]],
              fillColor = "skyblue", 
              fillOpacity = 0.7,
              weight = 1,
              opacity = 1,
              color = "white",
              label=~paste0("Tract: ", geo_id, "; HD neighbor: ", LABEL),
              highlightOptions = highlightOptions(weight = 3,
                                                  color = "white",
                                                  bringToFront = FALSE
        ) 
  ) %>%
  addPolygons(group= "HDs",
              data=hd_shp[hd_shp$desig_date < 2025,],
              fillColor = "hotpink", 
              fillOpacity = 0.7,
              weight = 1,
              opacity = 1,
              color = "white",
              label=~LABEL,
              highlightOptions = highlightOptions(weight = 3,
                                                  color = "white",
                                                  bringToFront = FALSE
        ) 
  ) %>%
  addLayersControl(
    overlayGroups = c("tracts labeled as near HDs", "HDs", "buffers"),
    options = layersControlOptions(collapsed = FALSE)
  )
  
  return(rv)
}
plot_neighbor_geos(nearby_tracts60)
plot_neighbor_geos(nearby_tracts20)

Now compare the demographics of the HD tracts and their neighbors in each year:

hd_comp_df <- dplyr::bind_rows(hd_geos60[[2]], hd_geos70[[2]], hd_geos80[[2]], hd_geos90[[2]],
                            hd_geos00[[2]], hd_geos10[[2]], hd_geos20[[2]],)



near_comp_df <- dplyr::bind_rows(nearby_tracts60[[3]], nearby_tracts70[[3]],nearby_tracts80[[3]], nearby_tracts90[[3]],
                            nearby_tracts00[[3]], nearby_tracts10[[3]], nearby_tracts20[[3]],)

hd_comp_df <-
  hd_comp_df %>%
  select(-ends_with("_prorated"), -desig_year) %>%
  group_by(LABEL, desig_yet, year) %>%
  summarise_all(., sum) %>%
  mutate(pct_black = n_black / n_tot,
         pct_white = n_white / n_tot) %>%
  rename_with(~ paste0(., "_hd"))

near_comp_df <-
  near_comp_df %>%
  select(-desig_year) %>%
  group_by(LABEL, desig_yet, year) %>%
  mutate(pct_black = n_black / n_tot,
         pct_white = n_white / n_tot) %>%
  summarise_all(., sum) %>%
  rename_with(~ paste0(., "_near"))

comp_df <- dplyr::full_join(x = hd_comp_df, 
                            y = near_comp_df, 
                            by = c("LABEL_hd"="LABEL_near", 
                                   "desig_yet_hd"="desig_yet_near",
                                   "year_hd"="year_near")) %>%
  select(starts_with(c("LABEL", "desig_yet", "year", "pct_")))

comp_df <-
  comp_df %>%
  tidyr::pivot_longer(
    cols = starts_with("pct_"),
    names_to = c("group", "treatment_control"),
    names_pattern = "pct_([a-zA-Z]+)_([a-zA-Z]+)",
    values_to = "percent")

# remove HDs that are always HDs or always not HDs during the timeframe
comp_df <-
  comp_df %>%
  group_by(LABEL_hd) %>%
  mutate(mean_status = mean(desig_yet_hd, na.rm=T)) %>%
  filter(mean_status > 0) %>%
  filter(mean_status < 1)

comp_df$treated <- 0
comp_df$treated[comp_df$treatment_control=="hd"] <- 1

# diff in diff analysis for change in the % of black residents
diff_in_diff <- lm(percent ~                 # outcome: % of white or black residents
                     treated +               # "treatment": whether tract is in a HD or not
                     desig_yet_hd +          # pre/post indicator: whether HD was designated yet
                     treated:desig_yet_hd +  # D-in-D estimator: effect of treatment after implemented
                     as.factor(LABEL_hd)  +  # fixed effect for HD area
                     as.factor(year_hd),     # fixed effect for year
                   data = comp_df[comp_df$group=="black",])

summary(diff_in_diff)
## 
## Call:
## lm(formula = percent ~ treated + desig_yet_hd + treated:desig_yet_hd + 
##     as.factor(LABEL_hd) + as.factor(year_hd), data = comp_df[comp_df$group == 
##     "black", ])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.51441 -0.08561  0.00642  0.10008  0.32372 
## 
## Coefficients:
##                                                   Estimate Std. Error t value
## (Intercept)                                       0.982139   0.050620  19.402
## treated                                          -0.032361   0.028279  -1.144
## desig_yet_hd                                     -0.045030   0.034295  -1.313
## as.factor(LABEL_hd)Blagden Alley/Naylor Court HD -0.179310   0.071455  -2.509
## as.factor(LABEL_hd)Bloomingdale HD               -0.168882   0.061650  -2.739
## as.factor(LABEL_hd)Capitol Hill HD               -0.346544   0.058854  -5.888
## as.factor(LABEL_hd)Cleveland Park HD             -0.820924   0.059037 -13.905
## as.factor(LABEL_hd)Downtown HD                   -0.247801   0.072154  -3.434
## as.factor(LABEL_hd)Dupont Circle HD              -0.763644   0.059926 -12.743
## as.factor(LABEL_hd)Financial HD                  -0.613816   0.154058  -3.984
## as.factor(LABEL_hd)Foggy Bottom HD               -0.886229   0.074670 -11.869
## as.factor(LABEL_hd)Foxhall HD                    -0.933840   0.072154 -12.942
## as.factor(LABEL_hd)Georgetown HD                 -0.842525   0.059026 -14.274
## as.factor(LABEL_hd)Greater 14th St HD            -0.443055   0.072443  -6.116
## as.factor(LABEL_hd)Greater U St HD               -0.239301   0.059571  -4.017
## as.factor(LABEL_hd)GWU / Old West End HD         -0.900888   0.076070 -11.843
## as.factor(LABEL_hd)Kalorama Triangle HD          -0.692670   0.059037 -11.733
## as.factor(LABEL_hd)Kingman Park HD               -0.052059   0.062752  -0.830
## as.factor(LABEL_hd)Lafayette Square HD           -0.911952   0.079354 -11.492
## as.factor(LABEL_hd)LeDroit Park HD               -0.152142   0.071058  -2.141
## as.factor(LABEL_hd)Logan Circle HD               -0.209050   0.074684  -2.799
## as.factor(LABEL_hd)Massachusetts Ave HD          -0.881518   0.074619 -11.814
## as.factor(LABEL_hd)Meridian Hill                 -0.408705   0.070015  -5.837
## as.factor(LABEL_hd)Mt. Pleasant HD               -0.491427   0.059037  -8.324
## as.factor(LABEL_hd)Mt. Vernon Square HD          -0.136578   0.060485  -2.258
## as.factor(LABEL_hd)Pennsylvania Ave NHS          -0.426778   0.066439  -6.424
## as.factor(LABEL_hd)Shaw HD                       -0.196403   0.059571  -3.297
## as.factor(LABEL_hd)Sheridan-Kalorama HD          -0.776764   0.059037 -13.157
## as.factor(LABEL_hd)Sixteenth St HD               -0.250765   0.071058  -3.529
## as.factor(LABEL_hd)Strivers' Section HD          -0.513097   0.075310  -6.813
## as.factor(LABEL_hd)Takoma Park HD                -0.281888   0.071088  -3.965
## as.factor(LABEL_hd)Washington Heights HD         -0.693408   0.063764 -10.875
## as.factor(LABEL_hd)Woodley Park HD               -0.807681   0.071455 -11.303
## as.factor(year_hd)1970                            0.079270   0.032357   2.450
## as.factor(year_hd)1980                            0.086808   0.033812   2.567
## as.factor(year_hd)1990                            0.051707   0.036592   1.413
## as.factor(year_hd)2000                            0.008481   0.040492   0.209
## as.factor(year_hd)2010                           -0.110781   0.043595  -2.541
## as.factor(year_hd)2020                           -0.196117   0.045568  -4.304
## treated:desig_yet_hd                             -0.098483   0.036399  -2.706
##                                                  Pr(>|t|)    
## (Intercept)                                       < 2e-16 ***
## treated                                          0.253546    
## desig_yet_hd                                     0.190355    
## as.factor(LABEL_hd)Blagden Alley/Naylor Court HD 0.012712 *  
## as.factor(LABEL_hd)Bloomingdale HD               0.006589 ** 
## as.factor(LABEL_hd)Capitol Hill HD               1.22e-08 ***
## as.factor(LABEL_hd)Cleveland Park HD              < 2e-16 ***
## as.factor(LABEL_hd)Downtown HD                   0.000693 ***
## as.factor(LABEL_hd)Dupont Circle HD               < 2e-16 ***
## as.factor(LABEL_hd)Financial HD                  8.83e-05 ***
## as.factor(LABEL_hd)Foggy Bottom HD                < 2e-16 ***
## as.factor(LABEL_hd)Foxhall HD                     < 2e-16 ***
## as.factor(LABEL_hd)Georgetown HD                  < 2e-16 ***
## as.factor(LABEL_hd)Greater 14th St HD            3.57e-09 ***
## as.factor(LABEL_hd)Greater U St HD               7.75e-05 ***
## as.factor(LABEL_hd)GWU / Old West End HD          < 2e-16 ***
## as.factor(LABEL_hd)Kalorama Triangle HD           < 2e-16 ***
## as.factor(LABEL_hd)Kingman Park HD               0.407540    
## as.factor(LABEL_hd)Lafayette Square HD            < 2e-16 ***
## as.factor(LABEL_hd)LeDroit Park HD               0.033210 *  
## as.factor(LABEL_hd)Logan Circle HD               0.005514 ** 
## as.factor(LABEL_hd)Massachusetts Ave HD           < 2e-16 ***
## as.factor(LABEL_hd)Meridian Hill                 1.60e-08 ***
## as.factor(LABEL_hd)Mt. Pleasant HD               5.14e-15 ***
## as.factor(LABEL_hd)Mt. Vernon Square HD          0.024786 *  
## as.factor(LABEL_hd)Pennsylvania Ave NHS          6.44e-10 ***
## as.factor(LABEL_hd)Shaw HD                       0.001116 ** 
## as.factor(LABEL_hd)Sheridan-Kalorama HD           < 2e-16 ***
## as.factor(LABEL_hd)Sixteenth St HD               0.000494 ***
## as.factor(LABEL_hd)Strivers' Section HD          6.80e-11 ***
## as.factor(LABEL_hd)Takoma Park HD                9.52e-05 ***
## as.factor(LABEL_hd)Washington Heights HD          < 2e-16 ***
## as.factor(LABEL_hd)Woodley Park HD                < 2e-16 ***
## as.factor(year_hd)1970                           0.014960 *  
## as.factor(year_hd)1980                           0.010816 *  
## as.factor(year_hd)1990                           0.158856    
## as.factor(year_hd)2000                           0.834269    
## as.factor(year_hd)2010                           0.011640 *  
## as.factor(year_hd)2020                           2.39e-05 ***
## treated:desig_yet_hd                             0.007275 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1457 on 256 degrees of freedom
##   (116 observations deleted due to missingness)
## Multiple R-squared:  0.8519, Adjusted R-squared:  0.8294 
## F-statistic: 37.77 on 39 and 256 DF,  p-value: < 2.2e-16
# diff in diff analysis for change in the % of white residents
diff_in_diff <- lm(percent ~                 # outcome: % of white or black residents
                     treated +               # "treatment": whether tract is in a HD or not
                     desig_yet_hd +          # pre/post indicator: whether HD was designated yet
                     treated:desig_yet_hd +  # D-in-D estimator: effect of treatment after implemented
                     as.factor(LABEL_hd)  +  # fixed effect for HD area
                     as.factor(year_hd),     # fixed effect for year
                   data = comp_df[comp_df$group=="white",])

summary(diff_in_diff)
## 
## Call:
## lm(formula = percent ~ treated + desig_yet_hd + treated:desig_yet_hd + 
##     as.factor(LABEL_hd) + as.factor(year_hd), data = comp_df[comp_df$group == 
##     "white", ])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.31063 -0.08385 -0.01052  0.07917  0.45470 
## 
## Coefficients:
##                                                   Estimate Std. Error t value
## (Intercept)                                       0.096334   0.047168   2.042
## treated                                           0.021254   0.026351   0.807
## desig_yet_hd                                      0.028035   0.031956   0.877
## as.factor(LABEL_hd)Blagden Alley/Naylor Court HD  0.106677   0.066583   1.602
## as.factor(LABEL_hd)Bloomingdale HD                0.121217   0.057446   2.110
## as.factor(LABEL_hd)Capitol Hill HD                0.309114   0.054841   5.637
## as.factor(LABEL_hd)Cleveland Park HD              0.741173   0.055011  13.473
## as.factor(LABEL_hd)Downtown HD                    0.171784   0.067234   2.555
## as.factor(LABEL_hd)Dupont Circle HD               0.656760   0.055839  11.762
## as.factor(LABEL_hd)Financial HD                   0.597421   0.143552   4.162
## as.factor(LABEL_hd)Foggy Bottom HD                0.769557   0.069578  11.060
## as.factor(LABEL_hd)Foxhall HD                     0.854456   0.067234  12.709
## as.factor(LABEL_hd)Georgetown HD                  0.773753   0.055001  14.068
## as.factor(LABEL_hd)Greater 14th St HD             0.324464   0.067503   4.807
## as.factor(LABEL_hd)Greater U St HD                0.148381   0.055509   2.673
## as.factor(LABEL_hd)GWU / Old West End HD          0.778624   0.070883  10.985
## as.factor(LABEL_hd)Kalorama Triangle HD           0.585505   0.055011  10.643
## as.factor(LABEL_hd)Kingman Park HD                0.028819   0.058473   0.493
## as.factor(LABEL_hd)Lafayette Square HD            0.795959   0.073943  10.765
## as.factor(LABEL_hd)LeDroit Park HD                0.079301   0.066212   1.198
## as.factor(LABEL_hd)Logan Circle HD                0.142204   0.069591   2.043
## as.factor(LABEL_hd)Massachusetts Ave HD           0.770931   0.069531  11.088
## as.factor(LABEL_hd)Meridian Hill                  0.230193   0.065241   3.528
## as.factor(LABEL_hd)Mt. Pleasant HD                0.323962   0.055011   5.889
## as.factor(LABEL_hd)Mt. Vernon Square HD           0.061670   0.056361   1.094
## as.factor(LABEL_hd)Pennsylvania Ave NHS           0.295259   0.061908   4.769
## as.factor(LABEL_hd)Shaw HD                        0.096201   0.055509   1.733
## as.factor(LABEL_hd)Sheridan-Kalorama HD           0.668434   0.055011  12.151
## as.factor(LABEL_hd)Sixteenth St HD                0.141060   0.066212   2.130
## as.factor(LABEL_hd)Strivers' Section HD           0.422460   0.070175   6.020
## as.factor(LABEL_hd)Takoma Park HD                 0.201627   0.066241   3.044
## as.factor(LABEL_hd)Washington Heights HD          0.566134   0.059416   9.528
## as.factor(LABEL_hd)Woodley Park HD                0.699163   0.066583  10.501
## as.factor(year_hd)1970                           -0.086209   0.030150  -2.859
## as.factor(year_hd)1980                           -0.117891   0.031507  -3.742
## as.factor(year_hd)1990                           -0.096791   0.034097  -2.839
## as.factor(year_hd)2000                           -0.156409   0.037731  -4.145
## as.factor(year_hd)2010                            0.007534   0.040622   0.185
## as.factor(year_hd)2020                            0.010962   0.042460   0.258
## treated:desig_yet_hd                              0.105528   0.033917   3.111
##                                                  Pr(>|t|)    
## (Intercept)                                      0.042141 *  
## treated                                          0.420664    
## desig_yet_hd                                     0.381145    
## as.factor(LABEL_hd)Blagden Alley/Naylor Court HD 0.110350    
## as.factor(LABEL_hd)Bloomingdale HD               0.035820 *  
## as.factor(LABEL_hd)Capitol Hill HD               4.57e-08 ***
## as.factor(LABEL_hd)Cleveland Park HD              < 2e-16 ***
## as.factor(LABEL_hd)Downtown HD                   0.011197 *  
## as.factor(LABEL_hd)Dupont Circle HD               < 2e-16 ***
## as.factor(LABEL_hd)Financial HD                  4.32e-05 ***
## as.factor(LABEL_hd)Foggy Bottom HD                < 2e-16 ***
## as.factor(LABEL_hd)Foxhall HD                     < 2e-16 ***
## as.factor(LABEL_hd)Georgetown HD                  < 2e-16 ***
## as.factor(LABEL_hd)Greater 14th St HD            2.62e-06 ***
## as.factor(LABEL_hd)Greater U St HD               0.007999 ** 
## as.factor(LABEL_hd)GWU / Old West End HD          < 2e-16 ***
## as.factor(LABEL_hd)Kalorama Triangle HD           < 2e-16 ***
## as.factor(LABEL_hd)Kingman Park HD               0.622528    
## as.factor(LABEL_hd)Lafayette Square HD            < 2e-16 ***
## as.factor(LABEL_hd)LeDroit Park HD               0.232149    
## as.factor(LABEL_hd)Logan Circle HD               0.042035 *  
## as.factor(LABEL_hd)Massachusetts Ave HD           < 2e-16 ***
## as.factor(LABEL_hd)Meridian Hill                 0.000496 ***
## as.factor(LABEL_hd)Mt. Pleasant HD               1.22e-08 ***
## as.factor(LABEL_hd)Mt. Vernon Square HD          0.274891    
## as.factor(LABEL_hd)Pennsylvania Ave NHS          3.11e-06 ***
## as.factor(LABEL_hd)Shaw HD                       0.084287 .  
## as.factor(LABEL_hd)Sheridan-Kalorama HD           < 2e-16 ***
## as.factor(LABEL_hd)Sixteenth St HD               0.034091 *  
## as.factor(LABEL_hd)Strivers' Section HD          6.01e-09 ***
## as.factor(LABEL_hd)Takoma Park HD                0.002579 ** 
## as.factor(LABEL_hd)Washington Heights HD          < 2e-16 ***
## as.factor(LABEL_hd)Woodley Park HD                < 2e-16 ***
## as.factor(year_hd)1970                           0.004596 ** 
## as.factor(year_hd)1980                           0.000226 ***
## as.factor(year_hd)1990                           0.004893 ** 
## as.factor(year_hd)2000                           4.62e-05 ***
## as.factor(year_hd)2010                           0.853017    
## as.factor(year_hd)2020                           0.796475    
## treated:desig_yet_hd                             0.002073 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1358 on 256 degrees of freedom
##   (116 observations deleted due to missingness)
## Multiple R-squared:  0.8459, Adjusted R-squared:  0.8224 
## F-statistic: 36.03 on 39 and 256 DF,  p-value: < 2.2e-16
plot_ly(comp_df[comp_df$group=="black",] %>% arrange(LABEL_hd, year_hd),
                   x = ~year_hd,
                   y = ~percent,
                   linetype = ~as.factor(treatment_control),
                   hovertext = ~paste0("Tract types: ", treatment_control, "\nDesignated yet: ", desig_yet_hd),
                   color = ~LABEL_hd, # Group lines by this variable
                   type = "scatter",
                   mode = "lines+markers") %>%
      layout(title = "% black residents in each historic district areas before and after HD created")
plot_ly(comp_df[comp_df$group=="white",] %>% arrange(LABEL_hd, year_hd),
                   x = ~year_hd,
                   y = ~percent,
                   linetype = ~as.factor(treatment_control),
                   hovertext = ~paste0("Tract types: ", treatment_control, "\nDesignated yet: ", desig_yet_hd),
                   color = ~LABEL_hd, # Group lines by this variable
                   type = "scatter",
                   mode = "lines+markers") %>%
      layout(title = "% white residents in each historic district areas before and after HD created")

Now we’re going to repeat all of that, but using block groups instead of tracts. We will have to rely on sightly different versions of the Census data to do this, downloaded from NHGIS rather than Open Data DC. We’re also only going back to 1970, since that’s the data I could find relatively easily.

Load and clean block data:

# b70_shp <- sf::st_read("block_shapes/US_block_1970/US_block_1970.shp")
# b70_shp <- b70_shp[b70_shp$STATE70=="11",]
# sf::st_write(b70_shp, "block_shapes/DC_block_1970/DC_block_1970.shp")

b70_shp <- sf::st_read("block_shapes/DC_block_1970/DC_block_1970.shp")
## Reading layer `DC_block_1970' from data source 
##   `C:\Users\edwar\Documents\GitHub\hd_analysis\block_shapes\DC_block_1970\DC_block_1970.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 4665 features and 6 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 1610795 ymin: 308338.5 xmax: 1629412 ymax: 329313.2
## Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic
b80_shp <- sf::st_read("block_shapes/DC_block_1980/DC_block_1980.shp")
## Reading layer `DC_block_1980' from data source 
##   `C:\Users\edwar\Documents\GitHub\hd_analysis\block_shapes\DC_block_1980\DC_block_1980.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 4627 features and 10 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 1610795 ymin: 308338.5 xmax: 1629412 ymax: 329313.2
## Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic
b90_shp <- sf::st_read("block_shapes/nhgis0092_shapefile_tl2000_110_block_1990/DC_block_1990.shp")
## Reading layer `DC_block_1990' from data source 
##   `C:\Users\edwar\Documents\GitHub\hd_analysis\block_shapes\nhgis0092_shapefile_tl2000_110_block_1990\DC_block_1990.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 5140 features and 5 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 1610795 ymin: 308338.5 xmax: 1629412 ymax: 329313.2
## Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic
b00_shp <- sf::st_read("block_shapes/nhgis0092_shapefile_tl2000_110_block_2000/DC_block_2000.shp")
## Reading layer `DC_block_2000' from data source 
##   `C:\Users\edwar\Documents\GitHub\hd_analysis\block_shapes\nhgis0092_shapefile_tl2000_110_block_2000\DC_block_2000.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 5626 features and 6 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 1610795 ymin: 308338.5 xmax: 1629412 ymax: 329313.2
## Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic
b10_shp <- sf::st_read("block_shapes/nhgis0092_shapefile_tl2010_110_block_2010/DC_block_2010.shp")
## Reading layer `DC_block_2010' from data source 
##   `C:\Users\edwar\Documents\GitHub\hd_analysis\block_shapes\nhgis0092_shapefile_tl2010_110_block_2010\DC_block_2010.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 6426 features and 18 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 1610830 ymin: 308504.6 xmax: 1629412 ymax: 329361
## Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic
b20_shp <- sf::st_read("block_shapes/nhgis0092_shapefile_tl2020_110_block_2020/DC_block_2020.shp")
## Reading layer `DC_block_2020' from data source 
##   `C:\Users\edwar\Documents\GitHub\hd_analysis\block_shapes\nhgis0092_shapefile_tl2020_110_block_2020\DC_block_2020.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 5935 features and 18 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 1610830 ymin: 308504.6 xmax: 1629412 ymax: 329396
## Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic
b70_df <- readr::read_csv("block_data/nhgis0093_ds96_1970_block.csv")
b80_df <- readr::read_csv("block_data/nhgis_ds104_1980_block_11.csv")
b90_df <- readr::read_csv("block_data/nhgis0092_ds120_1990_block.csv")
b00_df <- readr::read_csv("block_data/nhgis0092_ds147_2000_block.csv")
b10_df <- readr::read_csv("block_data/nhgis0092_ds172_2010_block.csv")
b20_df <- readr::read_csv("block_data/nhgis0092_ds258_2020_block.csv")

clean_block_data <- function(shp, df, shp_b_id, df_b_id, var_prefix, df_n_black, df_n_white, drop_var="", year) {
  shp <- shp %>% select(!!sym(shp_b_id)) %>% rename("geo_id" = !!sym(shp_b_id))
  if (drop_var!="") {df <- df %>% select(-!!sym(drop_var))}
  df <- df %>% 
    select(!!sym(df_b_id), starts_with(var_prefix)) %>%
    rowwise() %>%
    mutate(n_tot = sum(c_across(starts_with(var_prefix))))
    
  df <- df %>%
    rename("geo_id" = !!sym(df_b_id),
           "n_black" = !!sym(df_n_black),
           "n_white" = !!sym(df_n_white)) %>%
    select(-starts_with(var_prefix)) %>%
    mutate(n_other = n_tot - (n_black + n_white))
  
  shp <- dplyr::left_join(shp, df, by="geo_id")
  shp <- sf::st_transform(shp, 4326)
  shp$geo_area_meters <- sf::st_area(shp)
  shp$year <- year
  
  return(shp)
}

# 1970 block data has to be specially cleaned, see https://forum.ipums.org/t/race-ethnicity-data-at-a-block-level-from-1970/6178
b70_df$c_black <- b70_df$CM6001 + b70_df$CM6002
b70_df$c_other <- b70_df$CM6003 + b70_df$CM6004
b70_df$c_white <- b70_df$CM5001 + b70_df$CM5002 - b70_df$c_black - b70_df$c_other

b70_shp <- clean_block_data(b70_shp, b70_df, "GISJOIN", "GISJOIN", "c_", "c_black", "c_white", year=1970)
b80_shp <- clean_block_data(b80_shp, b80_df, "GISJOIN", "GISJOIN", "C9D0", "C9D002", "C9D001", year=1980)
b90_shp <- clean_block_data(b90_shp, b90_df, "GISJOIN", "GISJOIN", "EUY0", "EUY002", "EUY001", year=1990)
b00_shp <- clean_block_data(b00_shp, b00_df, "GISJOIN", "GISJOIN", "FYE0", "FYE002", "FYE001", year=2000)
b10_shp <- clean_block_data(b10_shp, b10_df, "GISJOIN", "GISJOIN", "H7X", "H7X003", "H7X002", "H7X001", year=2010)
b20_shp <- clean_block_data(b20_shp, b20_df, "GISJOIN", "GISJOIN", "U7J", "U7J003", "U7J002", "U7J001", year=2020)

rm(b70_df, b80_df, b90_df, b00_df, b10_df, b20_df)

plot(sf::st_geometry(b70_shp["geo_id"]))

Get the blocks in the HDs:

gc()
##           used  (Mb) gc trigger  (Mb) max used  (Mb)
## Ncells 1889412 101.0    4181665 223.4  3055264 163.2
## Vcells 7357919  56.2   18442415 140.8 14397296 109.9
mp = 0.25 # TODO: run analysis varying this factor
hd_geos70 <- get_geos_in_hd(b70_shp, min_pct = mp, year = 1970)
hd_geos80 <- get_geos_in_hd(b80_shp, min_pct = mp, year = 1980)
hd_geos90 <- get_geos_in_hd(b90_shp, min_pct = mp, year = 1990)
hd_geos00 <- get_geos_in_hd(b00_shp, min_pct = mp, year = 2000)
hd_geos10 <- get_geos_in_hd(b10_shp, min_pct = mp, year = 2010)
hd_geos20 <- get_geos_in_hd(b20_shp, min_pct = mp, year = 2020)


plot_geos_in_hds(b70_shp, hd_geos70$geos_in_hd$geo_id)

Get the blocks neighboring the HDs:

gc()
##           used  (Mb) gc trigger  (Mb) max used  (Mb)
## Ncells 1930122 103.1    4181665 223.4  4181665 223.4
## Vcells 7673593  58.6   18442415 140.8 14397296 109.9
buff_dist = .005
threshold = .1

nearby_blocks70 <- 
  get_neighbor_geos(hd_shp=hd_shp, geo_shp=b70_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos70,threshold, 1970)
nearby_blocks80 <- 
  get_neighbor_geos(hd_shp=hd_shp, geo_shp=b80_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos80,threshold, 1980)
nearby_blocks90 <- 
  get_neighbor_geos(hd_shp=hd_shp, geo_shp=b90_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos90,threshold, 1990)
nearby_blocks00 <- 
  get_neighbor_geos(hd_shp=hd_shp, geo_shp=b00_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos00,threshold, 2000)
nearby_blocks10 <- 
  get_neighbor_geos(hd_shp=hd_shp, geo_shp=b10_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos10,threshold, 2010)
nearby_blocks20 <- 
  get_neighbor_geos(hd_shp=hd_shp, geo_shp=b20_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos20,threshold, 2020)
plot_neighbor_geos(nearby_blocks70)

Now compare the demographics of the HD blocks and their neighbors in each year:

hd_comp_df <- dplyr::bind_rows(hd_geos70[[2]], hd_geos80[[2]], hd_geos90[[2]], 
                               hd_geos00[[2]], hd_geos10[[2]], hd_geos20[[2]],)

near_comp_df <- dplyr::bind_rows(nearby_blocks70[[3]], nearby_blocks80[[3]], nearby_blocks90[[3]], 
                                 nearby_blocks00[[3]], nearby_blocks10[[3]], nearby_blocks20[[3]],)

hd_comp_df <-
  hd_comp_df %>%
  select(-ends_with("_prorated"), -desig_year) %>%
  group_by(LABEL, desig_yet, year) %>%
  summarise_all(., sum) %>%
  mutate(pct_black = n_black / n_tot,
         pct_white = n_white / n_tot) %>%
  rename_with(~ paste0(., "_hd"))

near_comp_df <-
  near_comp_df %>%
  select(-desig_year) %>%
  group_by(LABEL, desig_yet, year) %>%
  mutate(pct_black = n_black / n_tot,
         pct_white = n_white / n_tot) %>%
  summarise_all(., sum) %>%
  rename_with(~ paste0(., "_near"))

comp_df <- dplyr::full_join(x = hd_comp_df, 
                            y = near_comp_df, 
                            by = c("LABEL_hd"="LABEL_near", 
                                   "desig_yet_hd"="desig_yet_near",
                                   "year_hd"="year_near")) %>%
  select(starts_with(c("LABEL", "desig_yet", "year", "pct_")))

comp_df <-
  comp_df %>%
  tidyr::pivot_longer(
    cols = starts_with("pct_"),
    names_to = c("group", "treatment_control"),
    names_pattern = "pct_([a-zA-Z]+)_([a-zA-Z]+)",
    values_to = "percent")

# remove HDs that are always HDs or always not HDs during the timeframe
comp_df <-
  comp_df %>%
  group_by(LABEL_hd) %>%
  mutate(mean_status = mean(desig_yet_hd, na.rm=T)) %>%
  filter(mean_status > 0) %>%
  filter(mean_status < 1)

comp_df$treated <- 0
comp_df$treated[comp_df$treatment_control=="hd"] <- 1

# diff in diff analysis for change in the % of black residents
diff_in_diff <- lm(percent ~                 # outcome: % of white or black residents
                     treated +               # "treatment": whether tract is in a HD or not
                     desig_yet_hd +          # pre/post indicator: whether HD was designated yet
                     treated:desig_yet_hd +  # D-in-D estimator: effect of treatment after implemented
                     as.factor(LABEL_hd)  +  # fixed effect for HD area
                     as.factor(year_hd),     # fixed effect for year
                   data = comp_df[comp_df$group=="black",])

summary(diff_in_diff)
## 
## Call:
## lm(formula = percent ~ treated + desig_yet_hd + treated:desig_yet_hd + 
##     as.factor(LABEL_hd) + as.factor(year_hd), data = comp_df[comp_df$group == 
##     "black", ])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.36570 -0.10361 -0.00381  0.08492  0.40910 
## 
## Coefficients:
##                                                   Estimate Std. Error t value
## (Intercept)                                       1.127628   0.046566  24.216
## treated                                          -0.081062   0.023017  -3.522
## desig_yet_hd                                     -0.037863   0.031670  -1.196
## as.factor(LABEL_hd)Blagden Alley/Naylor Court HD -0.418740   0.059032  -7.093
## as.factor(LABEL_hd)Bloomingdale HD               -0.176876   0.061167  -2.892
## as.factor(LABEL_hd)Capitol Hill HD               -0.437248   0.058304  -7.499
## as.factor(LABEL_hd)Cleveland Park HD             -0.891329   0.058487 -15.240
## as.factor(LABEL_hd)Downtown HD                   -0.599615   0.059931 -10.005
## as.factor(LABEL_hd)Dupont Circle HD              -0.801907   0.058304 -13.754
## as.factor(LABEL_hd)Financial HD                  -0.686026   0.059849 -11.463
## as.factor(LABEL_hd)Foggy Bottom HD               -0.895844   0.058487 -15.317
## as.factor(LABEL_hd)Foxhall HD                    -0.944705   0.059931 -15.763
## as.factor(LABEL_hd)Greater 14th St HD            -0.509427   0.059032  -8.630
## as.factor(LABEL_hd)Greater U St HD               -0.328614   0.059032  -5.567
## as.factor(LABEL_hd)GWU / Old West End HD         -0.925058   0.061167 -15.124
## as.factor(LABEL_hd)Kalorama Triangle HD          -0.767472   0.058487 -13.122
## as.factor(LABEL_hd)Kingman Park HD               -0.063425   0.061167  -1.037
## as.factor(LABEL_hd)Lafayette Square HD           -0.849017   0.061223 -13.868
## as.factor(LABEL_hd)LeDroit Park HD               -0.140810   0.058304  -2.415
## as.factor(LABEL_hd)Logan Circle HD               -0.450672   0.058304  -7.730
## as.factor(LABEL_hd)Massachusetts Ave HD          -0.867119   0.058304 -14.872
## as.factor(LABEL_hd)Meridian Hill                 -0.495563   0.061167  -8.102
## as.factor(LABEL_hd)Mt. Pleasant HD               -0.527034   0.058487  -9.011
## as.factor(LABEL_hd)Mt. Vernon Square HD          -0.258366   0.059032  -4.377
## as.factor(LABEL_hd)Pennsylvania Ave NHS          -0.502061   0.059032  -8.505
## as.factor(LABEL_hd)Shaw HD                       -0.399129   0.059032  -6.761
## as.factor(LABEL_hd)Sheridan-Kalorama HD          -0.896236   0.058487 -15.324
## as.factor(LABEL_hd)Sixteenth St HD               -0.550494   0.058304  -9.442
## as.factor(LABEL_hd)Strivers' Section HD          -0.516453   0.058487  -8.830
## as.factor(LABEL_hd)Takoma Park HD                -0.318863   0.058487  -5.452
## as.factor(LABEL_hd)Washington Heights HD         -0.672527   0.059931 -11.222
## as.factor(LABEL_hd)Woodley Park HD               -0.894963   0.059032 -15.161
## as.factor(year_hd)1980                            0.005156   0.027103   0.190
## as.factor(year_hd)1990                           -0.029076   0.029979  -0.970
## as.factor(year_hd)2000                           -0.089720   0.033808  -2.654
## as.factor(year_hd)2010                           -0.201509   0.035602  -5.660
## as.factor(year_hd)2020                           -0.250573   0.038071  -6.582
## treated:desig_yet_hd                             -0.058482   0.030542  -1.915
##                                                  Pr(>|t|)    
## (Intercept)                                       < 2e-16 ***
## treated                                          0.000491 ***
## desig_yet_hd                                     0.232763    
## as.factor(LABEL_hd)Blagden Alley/Naylor Court HD 8.49e-12 ***
## as.factor(LABEL_hd)Bloomingdale HD               0.004095 ** 
## as.factor(LABEL_hd)Capitol Hill HD               6.40e-13 ***
## as.factor(LABEL_hd)Cleveland Park HD              < 2e-16 ***
## as.factor(LABEL_hd)Downtown HD                    < 2e-16 ***
## as.factor(LABEL_hd)Dupont Circle HD               < 2e-16 ***
## as.factor(LABEL_hd)Financial HD                   < 2e-16 ***
## as.factor(LABEL_hd)Foggy Bottom HD                < 2e-16 ***
## as.factor(LABEL_hd)Foxhall HD                     < 2e-16 ***
## as.factor(LABEL_hd)Greater 14th St HD            2.96e-16 ***
## as.factor(LABEL_hd)Greater U St HD               5.51e-08 ***
## as.factor(LABEL_hd)GWU / Old West End HD          < 2e-16 ***
## as.factor(LABEL_hd)Kalorama Triangle HD           < 2e-16 ***
## as.factor(LABEL_hd)Kingman Park HD               0.300559    
## as.factor(LABEL_hd)Lafayette Square HD            < 2e-16 ***
## as.factor(LABEL_hd)LeDroit Park HD               0.016292 *  
## as.factor(LABEL_hd)Logan Circle HD               1.42e-13 ***
## as.factor(LABEL_hd)Massachusetts Ave HD           < 2e-16 ***
## as.factor(LABEL_hd)Meridian Hill                 1.16e-14 ***
## as.factor(LABEL_hd)Mt. Pleasant HD                < 2e-16 ***
## as.factor(LABEL_hd)Mt. Vernon Square HD          1.64e-05 ***
## as.factor(LABEL_hd)Pennsylvania Ave NHS          7.15e-16 ***
## as.factor(LABEL_hd)Shaw HD                       6.53e-11 ***
## as.factor(LABEL_hd)Sheridan-Kalorama HD           < 2e-16 ***
## as.factor(LABEL_hd)Sixteenth St HD                < 2e-16 ***
## as.factor(LABEL_hd)Strivers' Section HD           < 2e-16 ***
## as.factor(LABEL_hd)Takoma Park HD                1.00e-07 ***
## as.factor(LABEL_hd)Washington Heights HD          < 2e-16 ***
## as.factor(LABEL_hd)Woodley Park HD                < 2e-16 ***
## as.factor(year_hd)1980                           0.849244    
## as.factor(year_hd)1990                           0.332850    
## as.factor(year_hd)2000                           0.008356 ** 
## as.factor(year_hd)2010                           3.37e-08 ***
## as.factor(year_hd)2020                           1.91e-10 ***
## treated:desig_yet_hd                             0.056407 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1428 on 319 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.8353, Adjusted R-squared:  0.8162 
## F-statistic: 43.71 on 37 and 319 DF,  p-value: < 2.2e-16
# diff in diff analysis for change in the % of white residents
diff_in_diff <- lm(percent ~                 # outcome: % of white or black residents
                     treated +               # "treatment": whether tract is in a HD or not
                     desig_yet_hd +          # pre/post indicator: whether HD was designated yet
                     treated:desig_yet_hd +  # D-in-D estimator: effect of treatment after implemented
                     as.factor(LABEL_hd)  +  # fixed effect for HD area
                     as.factor(year_hd),     # fixed effect for year
                   data = comp_df[comp_df$group=="white",])

summary(diff_in_diff)
## 
## Call:
## lm(formula = percent ~ treated + desig_yet_hd + treated:desig_yet_hd + 
##     as.factor(LABEL_hd) + as.factor(year_hd), data = comp_df[comp_df$group == 
##     "white", ])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.35371 -0.08122 -0.00300  0.09326  0.42613 
## 
## Coefficients:
##                                                  Estimate Std. Error t value
## (Intercept)                                      -0.05391    0.04455  -1.210
## treated                                           0.06904    0.02202   3.135
## desig_yet_hd                                      0.02159    0.03030   0.712
## as.factor(LABEL_hd)Blagden Alley/Naylor Court HD  0.27904    0.05648   4.941
## as.factor(LABEL_hd)Bloomingdale HD                0.14035    0.05852   2.398
## as.factor(LABEL_hd)Capitol Hill HD                0.40590    0.05578   7.276
## as.factor(LABEL_hd)Cleveland Park HD              0.81521    0.05596  14.568
## as.factor(LABEL_hd)Downtown HD                    0.31479    0.05734   5.490
## as.factor(LABEL_hd)Dupont Circle HD               0.69654    0.05578  12.487
## as.factor(LABEL_hd)Financial HD                   0.59786    0.05726  10.441
## as.factor(LABEL_hd)Foggy Bottom HD                0.78872    0.05596  14.095
## as.factor(LABEL_hd)Foxhall HD                     0.87223    0.05734  15.211
## as.factor(LABEL_hd)Greater 14th St HD             0.41183    0.05648   7.291
## as.factor(LABEL_hd)Greater U St HD                0.23244    0.05648   4.115
## as.factor(LABEL_hd)GWU / Old West End HD          0.81044    0.05852  13.848
## as.factor(LABEL_hd)Kalorama Triangle HD           0.66525    0.05596  11.888
## as.factor(LABEL_hd)Kingman Park HD                0.04983    0.05852   0.851
## as.factor(LABEL_hd)Lafayette Square HD            0.75342    0.05858  12.862
## as.factor(LABEL_hd)LeDroit Park HD                0.09812    0.05578   1.759
## as.factor(LABEL_hd)Logan Circle HD                0.36616    0.05578   6.564
## as.factor(LABEL_hd)Massachusetts Ave HD           0.76664    0.05578  13.743
## as.factor(LABEL_hd)Meridian Hill                  0.34118    0.05852   5.830
## as.factor(LABEL_hd)Mt. Pleasant HD                0.35597    0.05596   6.361
## as.factor(LABEL_hd)Mt. Vernon Square HD           0.18100    0.05648   3.205
## as.factor(LABEL_hd)Pennsylvania Ave NHS           0.39792    0.05648   7.045
## as.factor(LABEL_hd)Shaw HD                        0.26453    0.05648   4.684
## as.factor(LABEL_hd)Sheridan-Kalorama HD           0.80853    0.05596  14.449
## as.factor(LABEL_hd)Sixteenth St HD                0.41875    0.05578   7.507
## as.factor(LABEL_hd)Strivers' Section HD           0.38082    0.05596   6.805
## as.factor(LABEL_hd)Takoma Park HD                 0.24564    0.05596   4.390
## as.factor(LABEL_hd)Washington Heights HD          0.55117    0.05734   9.612
## as.factor(LABEL_hd)Woodley Park HD                0.80725    0.05648  14.292
## as.factor(year_hd)1980                           -0.03912    0.02593  -1.509
## as.factor(year_hd)1990                           -0.02383    0.02868  -0.831
## as.factor(year_hd)2000                           -0.01792    0.03235  -0.554
## as.factor(year_hd)2010                            0.09108    0.03406   2.674
## as.factor(year_hd)2020                            0.05635    0.03643   1.547
## treated:desig_yet_hd                              0.07479    0.02922   2.559
##                                                  Pr(>|t|)    
## (Intercept)                                       0.22720    
## treated                                           0.00188 ** 
## desig_yet_hd                                      0.47674    
## as.factor(LABEL_hd)Blagden Alley/Naylor Court HD 1.26e-06 ***
## as.factor(LABEL_hd)Bloomingdale HD                0.01705 *  
## as.factor(LABEL_hd)Capitol Hill HD               2.68e-12 ***
## as.factor(LABEL_hd)Cleveland Park HD              < 2e-16 ***
## as.factor(LABEL_hd)Downtown HD                   8.22e-08 ***
## as.factor(LABEL_hd)Dupont Circle HD               < 2e-16 ***
## as.factor(LABEL_hd)Financial HD                   < 2e-16 ***
## as.factor(LABEL_hd)Foggy Bottom HD                < 2e-16 ***
## as.factor(LABEL_hd)Foxhall HD                     < 2e-16 ***
## as.factor(LABEL_hd)Greater 14th St HD            2.44e-12 ***
## as.factor(LABEL_hd)Greater U St HD               4.93e-05 ***
## as.factor(LABEL_hd)GWU / Old West End HD          < 2e-16 ***
## as.factor(LABEL_hd)Kalorama Triangle HD           < 2e-16 ***
## as.factor(LABEL_hd)Kingman Park HD                0.39515    
## as.factor(LABEL_hd)Lafayette Square HD            < 2e-16 ***
## as.factor(LABEL_hd)LeDroit Park HD                0.07953 .  
## as.factor(LABEL_hd)Logan Circle HD               2.12e-10 ***
## as.factor(LABEL_hd)Massachusetts Ave HD           < 2e-16 ***
## as.factor(LABEL_hd)Meridian Hill                 1.36e-08 ***
## as.factor(LABEL_hd)Mt. Pleasant HD               6.94e-10 ***
## as.factor(LABEL_hd)Mt. Vernon Square HD           0.00149 ** 
## as.factor(LABEL_hd)Pennsylvania Ave NHS          1.15e-11 ***
## as.factor(LABEL_hd)Shaw HD                       4.18e-06 ***
## as.factor(LABEL_hd)Sheridan-Kalorama HD           < 2e-16 ***
## as.factor(LABEL_hd)Sixteenth St HD               6.11e-13 ***
## as.factor(LABEL_hd)Strivers' Section HD          5.00e-11 ***
## as.factor(LABEL_hd)Takoma Park HD                1.55e-05 ***
## as.factor(LABEL_hd)Washington Heights HD          < 2e-16 ***
## as.factor(LABEL_hd)Woodley Park HD                < 2e-16 ***
## as.factor(year_hd)1980                            0.13237    
## as.factor(year_hd)1990                            0.40671    
## as.factor(year_hd)2000                            0.57995    
## as.factor(year_hd)2010                            0.00788 ** 
## as.factor(year_hd)2020                            0.12287    
## treated:desig_yet_hd                              0.01095 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1366 on 319 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.8179, Adjusted R-squared:  0.7967 
## F-statistic: 38.71 on 37 and 319 DF,  p-value: < 2.2e-16
plot_ly(comp_df[comp_df$group=="black",] %>% arrange(LABEL_hd, year_hd),
                   x = ~year_hd,
                   y = ~percent,
                   linetype = ~as.factor(treatment_control),
                   hovertext = ~paste0("Tract types: ", treatment_control, "\nDesignated yet: ", desig_yet_hd),
                   color = ~LABEL_hd, # Group lines by this variable
                   type = "scatter",
                   mode = "lines+markers") %>%
      layout(title = "% black residents in each historic district areas before and after HD created")
plot_ly(comp_df[comp_df$group=="white",] %>% arrange(LABEL_hd, year_hd),
                   x = ~year_hd,
                   y = ~percent,
                   linetype = ~as.factor(treatment_control),
                   hovertext = ~paste0("Tract types: ", treatment_control, "\nDesignated yet: ", desig_yet_hd),
                   color = ~LABEL_hd, # Group lines by this variable
                   type = "scatter",
                   mode = "lines+markers") %>%
      layout(title = "% white residents in each historic district areas before and after HD created")